#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

from pymongo import MongoClient
import sys
from tqdm import tqdm

default_encoding = 'utf-8'
if sys.getdefaultencoding() != default_encoding:
    reload(sys)
    sys.setdefaultencoding(default_encoding)

MONGODB_HOST = 'localhost'
MONGODB_PORT = 27017
MONGODB_DB_NAME = 'Dictionary'
MONGODB_COLLECTION_NAME = 'GeneOntology_20160504'
connection = MongoClient(MONGODB_HOST, MONGODB_PORT)
db = connection[MONGODB_DB_NAME]
collection = db[MONGODB_COLLECTION_NAME]

for result in tqdm(collection.find()):
    dataset = {}
    dms_synonym = []
    id = result['_id']
    if result.has_key('name'):
        dataset['dms_name'] = result['name']
        dms_synonym.append(result['name'])
    if result.has_key('synonym'):
        for item in result['synonym']:
            dms_synonym.append(item.split('%split%')[0])
    if len(dms_synonym) > 0:
        dataset['dms_synonym'] = dms_synonym
    if len(dataset) > 0:
        collection.update_one({'_id': id}, {'$set': dataset})

connection.close()
